iT邦幫忙

2025 iThome 鐵人賽

DAY 4
0
Rust

Rust 實戰專案集:30 個漸進式專案從工具到服務系列 第 4

目錄分析器 - 分析資料夾大小並產生報告

  • 分享至 

  • xImage
  •  

前言

在日常的程式開發和系統管理中,我們經常需要了解磁碟空間的使用情況。今天我們將建立一個目錄分析器,它能夠遞歸地分析指定目錄的大小,並生成詳細的報告。這個專案將幫助我們學習 Rust 的文件系統操作、錯誤處理、以及數據結構的使用。
專案目標

  • 遞歸遍歷目錄結構
  • 計算檔案和子目錄的大小
  • 生成格式化的分析報告
  • 支援多種輸出格式(文字、JSON)
  • 處理符號連結和權限問題

開專案囉~~~

cargo new directory_analyzer
cd directory_analyzer

依賴

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
clap = { version = "4.0", features = ["derive"] }
anyhow = "1.0"
colored = "2.0"

數據結構(data structure)

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileInfo {
    pub path: PathBuf,
    pub size: u64,
    pub is_dir: bool,
    pub children: Option<Vec<FileInfo>>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct DirectoryReport {
    pub root_path: PathBuf,
    pub total_size: u64,
    pub file_count: usize,
    pub dir_count: usize,
    pub largest_files: Vec<FileInfo>,
    pub directory_tree: FileInfo,
    pub size_distribution: HashMap<String, u64>,
}

src/analyzer.rs

use anyhow::{Context, Result};
use std::collections::HashMap;
use std::fs;
use std::path::Path;

use crate::{DirectoryReport, FileInfo};

pub struct DirectoryAnalyzer {
    max_depth: Option<usize>,
    follow_symlinks: bool,
    include_hidden: bool,
}

impl DirectoryAnalyzer {
    pub fn new() -> Self {
        Self {
            max_depth: None,
            follow_symlinks: false,
            include_hidden: false,
        }
    }

    pub fn max_depth(mut self, depth: usize) -> Self {
        self.max_depth = Some(depth);
        self
    }

    pub fn follow_symlinks(mut self, follow: bool) -> Self {
        self.follow_symlinks = follow;
        self
    }

    pub fn include_hidden(mut self, include: bool) -> Self {
        self.include_hidden = include;
        self
    }

    pub fn analyze<P: AsRef<Path>>(&self, path: P) -> Result<DirectoryReport> {
        let path = path.as_ref();
        
        if !path.exists() {
            anyhow::bail!("路徑不存在: {}", path.display());
        }

        let directory_tree = self.scan_directory(path, 0)?;
        let stats = self.calculate_statistics(&directory_tree);

        Ok(DirectoryReport {
            root_path: path.to_path_buf(),
            total_size: directory_tree.size,
            file_count: stats.file_count,
            dir_count: stats.dir_count,
            largest_files: stats.largest_files,
            directory_tree,
            size_distribution: stats.size_distribution,
        })
    }

    fn scan_directory(&self, path: &Path, depth: usize) -> Result<FileInfo> {
        // 檢查深度限制
        if let Some(max_depth) = self.max_depth {
            if depth > max_depth {
                return Ok(FileInfo {
                    path: path.to_path_buf(),
                    size: 0,
                    is_dir: true,
                    children: Some(vec![]),
                });
            }
        }

        let metadata = fs::metadata(path)
            .with_context(|| format!("無法讀取元數據: {}", path.display()))?;

        if metadata.is_file() {
            return Ok(FileInfo {
                path: path.to_path_buf(),
                size: metadata.len(),
                is_dir: false,
                children: None,
            });
        }

        // 處理目錄
        let mut children = Vec::new();
        let mut total_size = 0u64;

        let read_dir = fs::read_dir(path)
            .with_context(|| format!("無法讀取目錄: {}", path.display()))?;

        for entry in read_dir {
            let entry = entry?;
            let entry_path = entry.path();
            
            // 跳過隱藏文件(如果設置不包含)
            if !self.include_hidden && self.is_hidden(&entry_path) {
                continue;
            }

            // 處理符號連結
            if entry_path.is_symlink() && !self.follow_symlinks {
                continue;
            }

            match self.scan_directory(&entry_path, depth + 1) {
                Ok(child_info) => {
                    total_size += child_info.size;
                    children.push(child_info);
                }
                Err(e) => {
                    eprintln!("警告: 跳過 {}: {}", entry_path.display(), e);
                }
            }
        }

        Ok(FileInfo {
            path: path.to_path_buf(),
            size: total_size,
            is_dir: true,
            children: Some(children),
        })
    }

    fn is_hidden(&self, path: &Path) -> bool {
        path.file_name()
            .and_then(|name| name.to_str())
            .map(|name| name.starts_with('.'))
            .unwrap_or(false)
    }

    fn calculate_statistics(&self, tree: &FileInfo) -> DirectoryStats {
        let mut stats = DirectoryStats::new();
        self.collect_stats(tree, &mut stats);
        stats.largest_files.sort_by(|a, b| b.size.cmp(&a.size));
        stats.largest_files.truncate(10); // 只保留前10個最大文件
        stats
    }

    fn collect_stats(&self, node: &FileInfo, stats: &mut DirectoryStats) {
        if node.is_dir {
            stats.dir_count += 1;
            if let Some(children) = &node.children {
                for child in children {
                    self.collect_stats(child, stats);
                }
            }
        } else {
            stats.file_count += 1;
            stats.largest_files.push(node.clone());
            
            // 按文件大小分類
            let size_category = self.categorize_size(node.size);
            *stats.size_distribution.entry(size_category).or_insert(0) += node.size;
        }
    }

    fn categorize_size(&self, size: u64) -> String {
        match size {
            0..=1024 => "< 1KB".to_string(),
            1025..=1048576 => "1KB - 1MB".to_string(),
            1048577..=104857600 => "1MB - 100MB".to_string(),
            104857601..=1073741824 => "100MB - 1GB".to_string(),
            _ => "> 1GB".to_string(),
        }
    }
}

#[derive(Debug)]
struct DirectoryStats {
    file_count: usize,
    dir_count: usize,
    largest_files: Vec<FileInfo>,
    size_distribution: HashMap<String, u64>,
}

impl DirectoryStats {
    fn new() -> Self {
        Self {
            file_count: 0,
            dir_count: 0,
            largest_files: Vec::new(),
            size_distribution: HashMap::new(),
        }
    }
}

報告格式化

src/formatter.rs

use crate::{DirectoryReport, FileInfo};
use colored::*;
use std::io::Write;

pub trait ReportFormatter {
    fn format_report(&self, report: &DirectoryReport) -> String;
}

pub struct TextFormatter {
    show_tree: bool,
    use_colors: bool,
}

impl TextFormatter {
    pub fn new() -> Self {
        Self {
            show_tree: true,
            use_colors: true,
        }
    }

    pub fn show_tree(mut self, show: bool) -> Self {
        self.show_tree = show;
        self
    }

    pub fn use_colors(mut self, use_colors: bool) -> Self {
        self.use_colors = use_colors;
        self
    }

    fn format_size(&self, size: u64) -> String {
        const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
        let mut size_f = size as f64;
        let mut unit_index = 0;

        while size_f >= 1024.0 && unit_index < UNITS.len() - 1 {
            size_f /= 1024.0;
            unit_index += 1;
        }

        if unit_index == 0 {
            format!("{} {}", size, UNITS[unit_index])
        } else {
            format!("{:.2} {}", size_f, UNITS[unit_index])
        }
    }

    fn format_tree(&self, node: &FileInfo, prefix: &str, is_last: bool) -> String {
        let mut output = String::new();
        
        let connector = if is_last { "└── " } else { "├── " };
        let name = node.path.file_name()
            .and_then(|n| n.to_str())
            .unwrap_or("Unknown");

        let size_str = self.format_size(node.size);
        let line = format!("{}{}{} ({})", 
            prefix, 
            connector, 
            if self.use_colors && node.is_dir { 
                name.blue().bold() 
            } else { 
                name.normal() 
            },
            if self.use_colors { 
                size_str.green() 
            } else { 
                size_str.normal() 
            }
        );

        output.push_str(&line);
        output.push('\n');

        if let Some(children) = &node.children {
            let new_prefix = format!("{}{}", 
                prefix, 
                if is_last { "    " } else { "│   " }
            );

            for (i, child) in children.iter().enumerate() {
                let is_last_child = i == children.len() - 1;
                output.push_str(&self.format_tree(child, &new_prefix, is_last_child));
            }
        }

        output
    }
}

impl ReportFormatter for TextFormatter {
    fn format_report(&self, report: &DirectoryReport) -> String {
        let mut output = String::new();

        // 標題
        let title = "目錄分析報告";
        if self.use_colors {
            output.push_str(&format!("{}\n", title.cyan().bold()));
        } else {
            output.push_str(&format!("{}\n", title));
        }
        output.push_str(&"=".repeat(50));
        output.push_str("\n\n");

        // 基本統計
        output.push_str(&format!("分析路徑: {}\n", report.root_path.display()));
        output.push_str(&format!("總大小: {}\n", self.format_size(report.total_size)));
        output.push_str(&format!("文件數量: {}\n", report.file_count));
        output.push_str(&format!("目錄數量: {}\n", report.dir_count));
        output.push_str("\n");

        // 大小分佈
        if self.use_colors {
            output.push_str(&format!("{}\n", "大小分佈:".yellow().bold()));
        } else {
            output.push_str("大小分佈:\n");
        }
        
        for (category, size) in &report.size_distribution {
            output.push_str(&format!("  {}: {}\n", 
                category, 
                self.format_size(*size)
            ));
        }
        output.push_str("\n");

        // 最大文件
        if self.use_colors {
            output.push_str(&format!("{}\n", "前10大文件:".yellow().bold()));
        } else {
            output.push_str("前10大文件:\n");
        }

        for (i, file) in report.largest_files.iter().take(10).enumerate() {
            output.push_str(&format!("{}. {} - {}\n", 
                i + 1,
                file.path.display(),
                self.format_size(file.size)
            ));
        }
        output.push_str("\n");

        // 目錄樹
        if self.show_tree {
            if self.use_colors {
                output.push_str(&format!("{}\n", "目錄結構:".yellow().bold()));
            } else {
                output.push_str("目錄結構:\n");
            }
            output.push_str(&self.format_tree(&report.directory_tree, "", true));
        }

        output
    }
}

pub struct JsonFormatter;

impl ReportFormatter for JsonFormatter {
    fn format_report(&self, report: &DirectoryReport) -> String {
        serde_json::to_string_pretty(report).unwrap_or_else(|e| {
            format!("JSON 序列化錯誤: {}", e)
        })
    }
}

main.rs

mod analyzer;
mod formatter;

use analyzer::DirectoryAnalyzer;
use formatter::{JsonFormatter, ReportFormatter, TextFormatter};
use anyhow::Result;
use clap::{Parser, ValueEnum};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileInfo {
    pub path: PathBuf,
    pub size: u64,
    pub is_dir: bool,
    pub children: Option<Vec<FileInfo>>,
}

#[derive(Debug, Serialize, Deserialize)]
pub struct DirectoryReport {
    pub root_path: PathBuf,
    pub total_size: u64,
    pub file_count: usize,
    pub dir_count: usize,
    pub largest_files: Vec<FileInfo>,
    pub directory_tree: FileInfo,
    pub size_distribution: HashMap<String, u64>,
}

#[derive(Debug, Clone, ValueEnum)]
enum OutputFormat {
    Text,
    Json,
}

#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Args {
    /// 要分析的目錄路徑
    #[arg(value_name = "PATH")]
    path: PathBuf,

    /// 輸出格式
    #[arg(short, long, value_enum, default_value_t = OutputFormat::Text)]
    format: OutputFormat,

    /// 最大遞歸深度
    #[arg(short, long)]
    max_depth: Option<usize>,

    /// 跟隨符號連結
    #[arg(short = 'L', long)]
    follow_symlinks: bool,

    /// 包含隱藏文件
    #[arg(short = 'a', long)]
    include_hidden: bool,

    /// 不顯示目錄樹
    #[arg(long)]
    no_tree: bool,

    /// 不使用顏色
    #[arg(long)]
    no_color: bool,

    /// 輸出到文件
    #[arg(short, long)]
    output: Option<PathBuf>,
}

fn main() -> Result<()> {
    let args = Args::parse();

    // 建立分析器
    let mut analyzer = DirectoryAnalyzer::new()
        .follow_symlinks(args.follow_symlinks)
        .include_hidden(args.include_hidden);

    if let Some(depth) = args.max_depth {
        analyzer = analyzer.max_depth(depth);
    }

    // 執行分析
    println!("正在分析目錄: {}", args.path.display());
    let report = analyzer.analyze(&args.path)?;

    // 格式化輸出
    let formatted_report = match args.format {
        OutputFormat::Text => {
            let formatter = TextFormatter::new()
                .show_tree(!args.no_tree)
                .use_colors(!args.no_color);
            formatter.format_report(&report)
        }
        OutputFormat::Json => {
            let formatter = JsonFormatter;
            formatter.format_report(&report)
        }
    };

    // 輸出結果
    if let Some(output_path) = args.output {
        std::fs::write(&output_path, &formatted_report)?;
        println!("報告已保存到: {}", output_path.display());
    } else {
        print!("{}", formatted_report);
    }

    Ok(())
}

開 try 囉

cargo build --release

# 分析當前目錄
cargo run -- .

# 分析特定目錄,限制深度為3
cargo run -- /home/user/documents --max-depth 3

# 包含隱藏文件並輸出為JSON
cargo run -- . --include-hidden --format json

# 將報告保存到文件
cargo run -- . --output report.txt

# 跟隨符號連結
cargo run -- . --follow-symlinks

好誒


上一篇
檔案搜尋工具 - 實作類似 grep 的文字搜尋功能
下一篇
密碼產生器 - 可自訂規則的安全密碼產生工具
系列文
Rust 實戰專案集:30 個漸進式專案從工具到服務5
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言